Maps of where the Fortune 500 companies - that is the five hundred largest U.S. corporations by total revenue - have their headquarters and how HQ loactions are related to state tax rate.

get Fortune 500 HQs addresses

Get the addresses of Fortune 500 headquarter addresses from Geography Realm.

fortune <- read.csv("data/fortune1000.csv")
names(fortune) <- c("latitude", "longitude", "company", "rank", "years_on_list", "city", "state")
fortune <- fortune %>% filter(rank <= 500)

Fortune 500 Companies by State

Aggregate the number of headquarters by state. Make a map in which the states are shaded by their number of Fortune 500 companies.

library(openintro)
fortune_by_state <- fortune %>% 
  mutate(state_name = abbr2state(state)) %>% 
  group_by(state_name, state) %>%
  tally() 

library(albersusa)
state_fortune <- usa_sf("laea") %>%
  left_join(fortune_by_state, c("name"="state_name"))

A choropleth map made with ggplot2.

statesToLabel <- c("NY", "CA", "TX")

ggplot(state_fortune) + 
  geom_sf(aes(fill = n),
          color = "gray95", size = 0.25, alpha = 0.9) +
  scale_fill_gradient(low="darksalmon", high="brown4", na.value="gray80") + 
  
  geom_sf_text(aes(label = paste0(state,'\n',n)), 
               data = filter(state_fortune, state %in% statesToLabel),
               color = "white", size = 3, lineheight = .75, fontface = "bold") + 
  
  labs(title = "NY, CA, & TX: States with the most Fortune 500 company headquarters",
       subtitle = "Number of Fortune 500 company headquarters by state in 2018",
       caption = "Source: Geography Realm") + 
  
  theme_map() +
  
  theme(
    legend.position="right", 
    legend.title=element_blank(),
    plot.subtitle=element_text(size=10, color="grey40", face="italic"),
    plot.caption=element_text(size=7, color="grey40")
    )

A Cartogram heatmap using “statebins” package.

library(statebins)

state_fortune <- state_fortune %>% 
  mutate(cnt_group = case_when(
    is.na(n) ~ "0",
    n < 10 ~ "< 10",
    n >=10 & n <20 ~ "10-20",
    n >=20 & n <40 ~ "20-40",
    n >=40 ~ "> 40"
  )) 

state_fortune$cnt_group <- factor(state_fortune$cnt_group, levels = c("> 40","20-40","10-20","< 10","0"))


statebins(state_fortune, state_col = "name", value_col = "cnt_group", 
          #palette = "YlOrBr", direction=1,
          ggplot2_scale_function = scale_fill_manual,
          values = c("#993404","#d95f0e","#fe9929","#fed98e","#ffffd4"),
          round=TRUE,
          name = ""
          ) +
  labs(title = "Number of Fortune 500 company headquarters by state in 2018") +
  theme_statebins(
    legend_position = "right"
    )

Use Leaflet for R to create an interactive map.

library(leaflet)
library(geojsonio)

states <- geojsonio::geojson_read("data/gz_2010_us_040_00_500k.json", what = "sp")
states@data <- states@data %>% 
  left_join(fortune_by_state, by = c("NAME"="state_name")) %>%
  replace_na(list(n=0))
  
bins <- c(0, 1, 10, 20, 40, 60)
pal <- colorBin("YlOrBr", domain = state_fortune$n, bins = bins)

popupcontent <- sprintf("<strong>%s</strong><br/>%s",
                        states@data$NAME,
                        ifelse(is.na(states@data$n),"",paste(states@data$n, "headquarters"))
                        ) %>%
    lapply(htmltools::HTML)

m <- leaflet(states, options = leafletOptions(minZoom = 3)) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  setView(lng = -99.8421, lat = 39.3647, zoom = 3) %>%
  addPolygons(
    fillColor = ~pal(n),
    stroke = FALSE, 
    smoothFactor = 0.5, 
    fillOpacity = 0.7,
    label = popupcontent,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"), 
      textsize = "13px", 
      direction = "auto"
      )
    ) %>%
  addPopups(
    lng = -73.9855, lat = 42.7580,  
    paste("NY has", states@data %>% filter(NAME == "New York") %>% select(n) %>% as.numeric(),"Fortune500 headquarters"),
    options = popupOptions(closeButton = FALSE)
  ) %>%
  addControl("Number of Fortune 500 Company Headquarters", position = "topright") %>%
  addLegend(colors = c("#993404","#d95f0e","#fe9929","#fed98e","#ffffd4"),
            labels = c("> 40","20-40","10-20","< 10","none"),
            opacity = 0.7, title = NULL,  position = "bottomright")

m
#library(htmlwidgets)
#saveWidget(widget = m,
#           file = "fortune500_by_state.html",
#           selfcontained = TRUE)

Is there evidence of companies being headquartered in low tax states?

Download state corporate income tax rates and extract top tax rate at each state. Merge to sp data.

read_excel("data\\state_corporate_income_tax_rates_2018.xlsx", sheet = "Sheet1") %>%
  select(State, Rates) %>%
  group_by(State) %>%
  slice(which.max(Rates)) %>%
  write.csv("data\\state_corporate_income_tax_rates_2018.csv", row.names = F)
library("readxl")
corptax <- read_excel("data\\state_corporate_income_top_tax_rates_2018.xlsx") 

states <- geojsonio::geojson_read("data/gz_2010_us_040_00_500k.json", what = "sp")
states@data <- states@data %>% 
  left_join(corptax, by = c("NAME"="state"))

An interactive map shaded by top corporate income tax rates.

Thanks to [@mpriem89](https://github.com/rstudio/leaflet/issues/256) for creating the addLegend_decreasing function that enables order legend decreasingly.

pal <- colorNumeric(palette = "YlOrBr", domain = corptax$topcorpinctax)

popupcontent <- sprintf("<strong>%s</strong><br/>%s",
                        states@data$NAME,
                        paste0("Top Corporate Income Tax: ",round(states@data$topcorpinctax*100,2),"%")
                        ) %>%
    lapply(htmltools::HTML)


# Credit to @mpriem89 (https://github.com/rstudio/leaflet/issues/256) for this neat function
addLegend_decreasing <- function (map, position = c("topright", "bottomright", "bottomleft", 
                "topleft"), pal, values, na.label = "NA", bins = 7, colors, 
          opacity = 0.5, labels = NULL, labFormat = labelFormat(), 
          title = NULL, className = "info legend", layerId = NULL, 
          group = NULL, data = getMapData(map), decreasing = FALSE) {
    position <- match.arg(position)
    type <- "unknown"
    na.color <- NULL
    extra <- NULL
    if (!missing(pal)) {
        if (!missing(colors)) 
            stop("You must provide either 'pal' or 'colors' (not both)")
        if (missing(title) && inherits(values, "formula")) 
            title <- deparse(values[[2]])
        values <- evalFormula(values, data)
        type <- attr(pal, "colorType", exact = TRUE)
        args <- attr(pal, "colorArgs", exact = TRUE)
        na.color <- args$na.color
        if (!is.null(na.color) && col2rgb(na.color, alpha = TRUE)[[4]] == 0) {
            na.color <- NULL
        }
        if (type != "numeric" && !missing(bins)) 
            warning("'bins' is ignored because the palette type is not numeric")
        if (type == "numeric") {
            cuts <- if (length(bins) == 1) 
                pretty(values, bins)
            else bins   
            
            if (length(bins) > 2) 
                if (!all(abs(diff(bins, differences = 2)) <= sqrt(.Machine$double.eps))) 
                    stop("The vector of breaks 'bins' must be equally spaced")
            n <- length(cuts)
            r <- range(values, na.rm = TRUE)
            cuts <- cuts[cuts >= r[1] & cuts <= r[2]]
            n <- length(cuts)
            p <- (cuts - r[1])/(r[2] - r[1])
            extra <- list(p_1 = p[1], p_n = p[n])
            p <- c("", paste0(100 * p, "%"), "")
            if (decreasing == TRUE){
                colors <- pal(rev(c(r[1], cuts, r[2])))
                labels <- rev(labFormat(type = "numeric", cuts))
            }else{
                colors <- pal(c(r[1], cuts, r[2]))
                labels <- rev(labFormat(type = "numeric", cuts))
            }
            colors <- paste(colors, p, sep = " ", collapse = ", ")
            
        }
        else if (type == "bin") {
            cuts <- args$bins
            n <- length(cuts)
            mids <- (cuts[-1] + cuts[-n])/2
            if (decreasing == TRUE){
                colors <- pal(rev(mids))
                labels <- rev(labFormat(type = "bin", cuts))
            }else{
                colors <- pal(mids)
                labels <- labFormat(type = "bin", cuts)
            }
            
        }
        else if (type == "quantile") {
            p <- args$probs
            n <- length(p)
            cuts <- quantile(values, probs = p, na.rm = TRUE)
            mids <- quantile(values, probs = (p[-1] + p[-n])/2, 
                 na.rm = TRUE)
            if (decreasing == TRUE){
                colors <- pal(rev(mids))
                labels <- rev(labFormat(type = "quantile", cuts, p))
            }else{
                colors <- pal(mids)
                labels <- labFormat(type = "quantile", cuts, p)
            }
        }
        else if (type == "factor") {
            v <- sort(unique(na.omit(values)))
            colors <- pal(v)
            labels <- labFormat(type = "factor", v)
            if (decreasing == TRUE){
                colors <- pal(rev(v))
                labels <- rev(labFormat(type = "factor", v))
            }else{
                colors <- pal(v)
                labels <- labFormat(type = "factor", v)
            }
        }
        else stop("Palette function not supported")
        if (!any(is.na(values))) 
            na.color <- NULL
    }
    else {
        if (length(colors) != length(labels)) 
            stop("'colors' and 'labels' must be of the same length")
    }
    legend <- list(colors = I(unname(colors)), labels = I(unname(labels)), 
                   na_color = na.color, na_label = na.label, opacity = opacity, 
                   position = position, type = type, title = title, extra = extra, 
                   layerId = layerId, className = className, group = group)
    invokeMethod(map, data, "addLegend", legend)
}

leaflet(states, options = leafletOptions(minZoom = 3)) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  setView(lng = -99.8421, lat = 39.3647, zoom = 3) %>%
  addPolygons(
    fillColor = ~pal(topcorpinctax),
    stroke = FALSE, 
    smoothFactor = 0.5, 
    fillOpacity = 0.7,
    label = popupcontent,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"), 
      textsize = "13px", 
      direction = "auto"
      )
    ) %>%
  addControl("State Top Corporate Income Tax Rates", position = "topright") %>%
  addLegend_decreasing(pal = pal, 
                       values = ~topcorpinctax, 
                       position = "bottomright", 
                       title = "Tax Rate",
                       opacity = 1,
                       decreasing = TRUE,
                       labFormat = labelFormat(
                       transform = function(x) 100 * x,
                       suffix = "%"))

A scatter plot estimating the relationship between corporate income tax rates and number of headquarters.

tax_hq <- corptax %>%
  left_join(fortune_by_state, by = c("state"="state_name")) %>%
  select(state, topcorpinctax, n) %>%
  replace_na(list(n = 0))
ggplot(tax_hq, aes(x = topcorpinctax, y = n)) +
  geom_point() + 
  #geom_smooth(method="loess" , color="gray50", fill="wheat", se=T, alpha = 0.3) +
  geom_text(data = tax_hq %>% filter(state %in% c("New York", "California", "Texas", "Iowa")),
            aes(label = state),
            hjust = 0, nudge_x = 0.001) + 
  
  scale_x_continuous(labels = scales::percent) + 
  ylim(0, NA) +
  
  labs(title = "No evidence of companies being headquartered in low tax states",
       subtitle = "Top corporate income tax rate and number of Fortune 500 headquarters",
       caption = "Year: 2018") + 
  xlab("top corporate income tax rate") + 
  ylab("number of headquarters") + 
  
  theme_hc() +
  theme(legend.position="none", 
        plot.subtitle=element_text(size=10, color="grey40", face="italic"),
        plot.caption=element_text(size=7, color="grey40"),
        axis.title.x=element_text(size=10, color="grey40"),
        axis.title.y=element_text(size=10, color="grey40"))

use HQs per capita instead

To account for state population, calculate number of HQs per 1000,000 people.

Population data (estimate, July 2019) on state populations is imported from Wikipedia.

library(rvest)

url <- "https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population"

state_population <- url %>%
  html() %>%
  html_node("table") %>%
  html_table(fill = TRUE)

state_population <- state_population[, 3:4]
  
state_population <- state_population %>%
  slice(2:n()) %>%
  select(state = State, population = "Census population") %>%
  mutate(population = as.numeric(gsub(",","",population))) %>%
  right_join(corptax) %>%
  left_join(fortune_by_state, by = c("state"="state_name")) %>%
  replace_na(list(n=0)) %>%
  select(state, population, topcorpinctax, n) %>%
  mutate(hq.per.capita = n/population*1000000) # number of HQs per million people

re-plot the previous scatter plot with HQs per capita data

ggplot(state_population, aes(x = topcorpinctax, y = hq.per.capita)) +
  geom_point() + 
  #geom_smooth(method="loess" , color="gray50", fill="wheat", se=T, alpha = 0.3) +
  geom_text(data = state_population %>% filter(state %in% c("New York", "California", "Texas", "Iowa")),
            aes(label = state),
            hjust = 0, nudge_x = 0.001, 
            color = "gray60") + 
  
  scale_x_continuous(labels = scales::percent) + 
  ylim(0,NA) +
  
  labs(title = "No evidence of companies being headquartered in low tax states",
       subtitle = "Top corporate income tax rate and number of Fortune 500 headquarters per million people",
       caption = "Year: 2018") + 
  xlab("top corporate income tax rate") + 
  ylab("number of headquarters per million people") + 
  
  theme_hc() +
  theme(legend.position="none", 
        plot.subtitle=element_text(size=10, color="grey40", face="italic"),
        plot.caption=element_text(size=7, color="grey40"),
        axis.title.x=element_text(size=10, color="grey40"),
        axis.title.y=element_text(size=10, color="grey40"))

map locations of headquarters

Add locations of all headquarters to the state map shaded by tax rates. Size the points by the ranking on the Fortune 500 list.

states <- geojsonio::geojson_read("data/gz_2010_us_040_00_500k.json", what = "sp")
states@data <- states@data %>% 
  left_join(corptax, by = c("NAME"="state"))
pal <- colorNumeric(palette = "YlOrBr", domain = corptax$topcorpinctax)

popupPolygons <- sprintf(
  "<strong>%s</strong><br/>%s",
  states@data$NAME,
  paste0("Top Corp Income Tax: ",round(states@data$topcorpinctax*100,2),"%")
                        ) %>% 
  lapply(htmltools::HTML)

fortune <- fortune %>%
  left_join(corptax %>% mutate(state_abb = state2abbr(state)), by = c("state"="state_abb"))

popupCircle <- sprintf(
  "<strong>%s</strong><br/>%s<br/>%s<br/>%s",
  fortune$company,
  paste0("Rank: ",fortune$rank),
  paste0(fortune$city,", ",fortune$state),
  paste0("Top Corp Income Tax: ",round(fortune$topcorpinctax*100,2),"%")
) %>% 
  lapply(htmltools::HTML)

leaflet(states, options = leafletOptions(minZoom = 3)) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  setView(lng = -99.8421, lat = 39.3647, zoom = 3) %>%
  addPolygons(
    fillColor = ~pal(topcorpinctax),
    stroke = FALSE, 
    smoothFactor = 0.5, 
    fillOpacity = 0.7,
    label = popupPolygons
    ) %>%
  addCircleMarkers(
    lng = fortune$longitude, lat = fortune$latitude,
    radius = 20/fortune$rank,
    color = "#00A4CC",
    stroke = FALSE, fillOpacity = 0.5,
    label = popupCircle
  ) %>%
  addControl("Fortune 500 HQs and State Top Corporate Income Tax Rates", position = "topright") %>%
  addLegend_decreasing(pal = pal, 
                       values = ~topcorpinctax, 
                       position = "bottomright", 
                       title = "Tax Rate",
                       opacity = 1,
                       decreasing = TRUE,
                       labFormat = labelFormat(
                       transform = function(x) 100 * x,
                       suffix = "%"))